1 Setup

2 Environment

Set the working dir

setwd("/mnt/picea/home/ishutava/giovanna")

Libs

suppressPackageStartupMessages(library(DEXSeq))
suppressPackageStartupMessages(library(VennDiagram))
suppressPackageStartupMessages(library(RColorBrewer))
suppressPackageStartupMessages(library(gplots))

Helpers

source("~/Git/UPSCb/src/R/volcanoPlot.R")
## Loading required package: limma
## 
## Attaching package: 'limma'
## The following object is masked from 'package:DEXSeq':
## 
##     plotMA
## The following object is masked from 'package:DESeq2':
## 
##     plotMA
## The following object is masked from 'package:BiocGenerics':
## 
##     plotMA
#source("~/Git/UPSCb/src/R/plotMA.R")

Graphics

pal <- brewer.pal(8,"Dark2")
hpal <- colorRampPalette(colors = c("blue","white","red"))(100)

Data

flattenedFile <-  "/mnt/picea/storage/reference/Arabidopsis-thaliana/TAIR10/gff/TAIR10_GFF3_MY.gff"
samples <- read.csv("~/Git/UPSCb/projects/arabidopsis-porcupine-RNA-Seq/doc/RNA-seq_pcp_vs_Col-0_info.csv")
samples$genotype <- sub(" .*","",samples$Description)

#basename(flattenedFile)

Function

"run" <- function(inDir,metadata){
  countFiles_23 = list.files(inDir, pattern=".txt$", full.names=TRUE)
  #'basename(countFiles_23)
  
  condition <- factor(samples[match(sub("_.*","",basename(countFiles_23)),
                samples$SampleID),metadata])
  
  sampleTable_23 = data.frame(
    row.names = basename(countFiles_23),
    condition = condition) 
  
  dxd_23 = DEXSeqDataSetFromHTSeq(
    countFiles_23,
    sampleData=sampleTable_23,
    design= ~ sample + exon + condition:exon,
    flattenedfile=flattenedFile )
  
  dds_23 <- dxd_23

  #------- Deleting rows with all 0's:
  ts_23 <- counts(dds_23)
  sel_23 <- !rowSums(ts_23 == 0) == ncol(ts_23)
  dds_23 <- dds_23[sel_23,]
  dxd_23 = estimateSizeFactors( dds_23)
  
  # Dispersion estimation:
  dxd_23 = estimateDispersions( dxd_23 )
  plotDispEsts( dxd_23 )
  
  # Testing for differential exon usage:
  dxd_23 = testForDEU( dxd_23 )
  dxd_23 = estimateExonFoldChanges( dxd_23, fitExpToVar="condition")
  
  # Showing the results:
  dxr3 = DEXSeqResults( dxd_23 ) #'#' 16 and 23 degrees for pcp's
  names(dxr3)[names(dxr3)==grep("log2fold",colnames(dxr3))] <- "log2FoldChange"
  # dxr2 = DEXSeqResults( dxd_23 ) #'#' 16 and 23 degrees for col's
  # dxr1_23 = DEXSeqResults( dxd_23 ) #'#' 23 degrees WT and mutants
  #  mcols(dxr3)$description
  
  #table ( dxr3$padj < 0.1 )
# plotMA( dxr3, cex=0.8 )
#  plotDEXSeq( dxr3, "AT2G18740", legend=TRUE, cex.axis=1.2, cex=1.3,
#              lwd=2 )
#  plotDEXSeq( dxr3, "AT2G18740", displayTranscripts=TRUE, legend=TRUE,
#              cex.axis=1.2, cex=1.3, lwd=2 )
#  plotDEXSeq( dxr3, "AT2G18740", expression=FALSE, norCounts=TRUE,
#              legend=TRUE, cex.axis=1.2, cex=1.3, lwd=2 )
  
  #my_sel_pcp = matrix(c(dxr3[,1], dxr3[,8]/dxr3[,9], dxr3[,9]/dxr3[,8]), nrow = nrow(dxr3), ncol = 3)
  #my_sel_col = matrix(c(dxr2[,1], dxr2[,8]/dxr2[,9], dxr2[,9]/dxr2[,8]), nrow = nrow(dxr2), ncol = 3)
  #my_sel_23 = matrix(c(dxr1_23[,1], dxr1_23[,8]/dxr1_23[,9], dxr1_23[,9]/dxr1_23[,8]), nrow = nrow(dxr1_23), ncol = 3)
  
  #volcanoPlot(dxr3)
  return(dxr3)
    
}

3 Process

## 16 Pcp vs. Col0
dxr1 <- run("16","genotype")
#volcanoPlot(dxr1)

#' ## 23 Pcp vs. Col0
dxr1_23 <- run("23","genotype")
#volcanoPlot(dxr1_23)

#' ## Col0 23 vs. 16
dxr2 <- run("col","Temperature.at.collection")
#volcanoPlot(dxr2)

#' ## PCP 23 vs. 16
dxr3 <- run("pcp","Temperature.at.collection")
#volcanoPlot(dxr3)

4 Compare

ol_16_23 <- VennDiagram::calculate.overlap(list(rownames(dxr1)[dxr1$padj < 0.01 & 
                                                                 abs(dxr1$log2FoldChange) >= 0.5 &
                                                                 !is.na(dxr1$padj) &
                                                                 !is.na(dxr1$log2FoldChange)],
                                                rownames(dxr1_23)[dxr1_23$padj < 0.01 &
                                                                    abs(dxr1_23$log2FoldChange) >= 0.5 &
                                                                    !is.na(dxr1_23$padj) &
                                                                    !is.na(dxr1_23$log2FoldChange)]))
ol_col_pcp <- VennDiagram::calculate.overlap(list(rownames(dxr2)[dxr2$padj < 0.01 & 
                                                                   abs(dxr2$log2FoldChange) >= 0.5 &
                                                                   !is.na(dxr2$padj) &
                                                                   !is.na(dxr2$log2FoldChange)],
                                                  rownames(dxr3)[dxr3$padj < 0.01 &
                                                                   abs(dxr3$log2FoldChange) >= 0.5 &
                                                                   !is.na(dxr3$padj) &
                                                                   !is.na(dxr3$log2FoldChange)]))

4.1 Plot

4.1.1 16 - 23

grid.newpage()
draw.pairwise.venn(length(ol_16_23$a1), length(ol_16_23$a2), length(ol_16_23$a3),
                   c("16 degree", "23 degree"), fill = rainbow(2), alpha = 0.6)

## (polygon[GRID.polygon.11], polygon[GRID.polygon.12], polygon[GRID.polygon.13], polygon[GRID.polygon.14], text[GRID.text.15], text[GRID.text.16], text[GRID.text.17], lines[GRID.lines.18], text[GRID.text.19], text[GRID.text.20])

4.1.2 col - pcp

grid.newpage()
draw.pairwise.venn(length(ol_col_pcp$a1), length(ol_col_pcp$a2), length(ol_col_pcp$a3),
                   c("col", "pcp"), fill = rainbow(2), alpha = 0.6)

## (polygon[GRID.polygon.21], polygon[GRID.polygon.22], polygon[GRID.polygon.23], polygon[GRID.polygon.24], text[GRID.text.25], text[GRID.text.26], lines[GRID.lines.27], text[GRID.text.28], lines[GRID.lines.29], text[GRID.text.30], text[GRID.text.31])

4.2 Export

write.table(ol_16_23$a3, file = "/mnt/picea/home/ishutava/giovanna/table_16_23.csv", append = TRUE, quote = TRUE, sep = " ",
            eol = "\n", na = "NA", dec = ".", row.names = TRUE,
            col.names = TRUE, qmethod = c("escape", "double"),
            fileEncoding = "")
## Warning in write.table(ol_16_23$a3, file = "/mnt/picea/home/ishutava/
## giovanna/table_16_23.csv", : appending column names to file
write.table(ol_col_pcp$a3,file = "/mnt/picea/home/ishutava/giovanna/table_col_pcp.csv", append = TRUE, quote = TRUE, sep = " ",
            eol = "\n", na = "NA", dec = ".", row.names = TRUE,
            col.names = TRUE, qmethod = c("escape", "double"),
            fileEncoding = "")
## Warning in write.table(ol_col_pcp$a3, file = "/mnt/picea/home/ishutava/
## giovanna/table_col_pcp.csv", : appending column names to file
write.table(as.matrix(ol_16_23), file = "/mnt/picea/home/ishutava/giovanna/table_16_23_1.csv", append = TRUE, quote = TRUE, sep = " ",
            eol = "\n", na = "NA", dec = ".", row.names = TRUE,
            col.names = TRUE, qmethod = c("escape", "double"),
            fileEncoding = "")
## Warning in write.table(as.matrix(ol_16_23), file = "/mnt/picea/home/
## ishutava/giovanna/table_16_23_1.csv", : appending column names to file
write.table(as.matrix(ol_col_pcp),file = "/mnt/picea/home/ishutava/giovanna/table_col_pcp_1.csv", append = TRUE, quote = TRUE, sep = " ",          
            eol = "\n", na = "NA", dec = ".", row.names = TRUE,
            col.names = TRUE, qmethod = c("escape", "double"),
            fileEncoding = "")
## Warning in write.table(as.matrix(ol_col_pcp), file = "/mnt/picea/home/
## ishutava/giovanna/table_col_pcp_1.csv", : appending column names to file
save(ol_16_23, ol_col_pcp, dxr1_23, dxr1, dxr2, dxr3, file="/mnt/picea/home/ishutava/giovanna/var.RData")

4.3 More plots

for (i in 1:length(ol_16_23$a3)){
  
  plotDEXSeq( dxr1, substr(ol_16_23$a3[i],1,regexpr(':', ol_16_23$a3[i])-1), expression=FALSE, norCounts=TRUE,
              legend=TRUE, cex.axis=1.2, cex=1.3, lwd=2 )
  
  plotDEXSeq( dxr1_23, substr(ol_16_23$a3[i],1,regexpr(':', ol_16_23$a3[i])-1), expression=FALSE, norCounts=TRUE,
              legend=TRUE, cex.axis=1.2, cex=1.3, lwd=2 )
}

for (i in 1:length(ol_col_pcp$a3)){
  
  plotDEXSeq( dxr2, substr(ol_col_pcp$a3[i],1,regexpr(':', ol_col_pcp$a3[i])-1), expression=FALSE, norCounts=TRUE,
              legend=TRUE, cex.axis=1.2, cex=1.3, lwd=2 )
  
  plotDEXSeq( dxr3, substr(ol_col_pcp$a3[i],1,regexpr(':', ol_col_pcp$a3[i])-1), expression=FALSE, norCounts=TRUE,
              legend=TRUE, cex.axis=1.2, cex=1.3, lwd=2 )
}

4.4 Lookup

——– Maximum of the -log10(dxr1$padj ———– “AT1G74870:E001”

s <- !is.na(-log10(dxr1$padj))
rt1<- rownames(dxr1)[-log10(dxr1$padj[s]) == (max(-log10(dxr1$padj[s])))]

“AT2G31902:E004”

s_23 <- !is.na(-log10(dxr1_23$padj))
rt1_23 <- rownames(dxr1_23)[-log10(dxr1_23$padj[s_23]) == (max(-log10(dxr1_23$padj[s_23])))]

4.5 All Overlap

tx16 <- dxr1$transcripts[
  dxr1$padj < 0.01 & 
    abs(dxr1$log2FoldChange) >= 0.5 &
    !is.na(dxr1$padj)]

tx23 <- dxr1_23$transcripts[
  dxr1_23$padj < 0.01 & 
    abs(dxr1_23$log2FoldChange) >= 0.5 &
    !is.na(dxr1_23$padj)]

txcol <- dxr2$transcripts[
  dxr2$padj < 0.01 & 
    abs(dxr2$log2FoldChange) >= 0.5 &
    !is.na(dxr2$padj)]

txpcp <- dxr3$transcripts[
  dxr3$padj < 0.01 & 
    abs(dxr3$log2FoldChange) >= 0.5 &
    !is.na(dxr3$padj)]

plot.new()
grid.draw(venn.diagram(list(
  t16=tx16,t23=tx23,col=txcol,pcp=txpcp),
  filename=NULL,
  col=pal[1:4],
  category.names=c("t16","t23","col","pcp")))

Define the cutoffs

padj <- 0.01
lfc <- 0.5

extract the data

dl <- lapply(list(dxr1,dxr1_23,dxr2,dxr3),function(d){
  d <- d[d$padj < 0.01 & 
           abs(d$log2FoldChange) >= 0.5 &
           !is.na(d$padj) &
           !is.na(d$log2FoldChange),]
})

verify

plot.new()
grid.draw(venn.diagram(lapply(dl,rownames),
                       filename=NULL,
                       col=pal[1:4],
                       category.names=c("t16","t23","col","pcp")))

extract the exon of interest

eoi <- unique(unlist(lapply(dl,rownames)))

sub select

ds <- lapply(list(dxr1,dxr1_23,dxr2,dxr3), function(d,eoi){
  d[match(eoi,rownames(d)),c(8,9)]
},eoi)

Combine into a df

df <- as.data.frame(do.call(cbind,ds))
colnames(df) <- c("Col0.16","pcp.16",
                  "Col0.23","pcp.23",
                  "16.Col0","23.Col0",
                  "16.pcp","23.pcp"
                  )

replace NAs

df[is.na(df)] <- 0

PCA

pc <- prcomp(t(df))
percentage <- round(summary(pc)$importance[2,]*100,digits=2)
plot(pc$x[,1],pc$x[,2],col=rep(pal[1:4],each=2),pch=rep(c(25,24),4),main="PCA of the exon usage",
     xlab=paste("PC1",percentage[1],"%"),
     ylab=paste("PC2",percentage[2],"%"))
text(pc$x[1,1],pc$x[1,2],colnames(df)[1],adj = c(-0.2,0),col=rep(pal[1:4],each=2)[1])
text(pc$x[c(2,4,5,7,8),1],pc$x[c(2,4,5,7,8),2],colnames(df)[c(2,4,5,7,8)],adj = c(-0.2,-1),col=rep(pal[1:4],each=2)[c(2,4,5,7,8)])
text(pc$x[c(3,6),1],pc$x[c(3,6),2],colnames(df)[c(3,6)],adj = c(1.2,1),col=rep(pal[1:4],each=2)[c(3,6)])

pc <- prcomp(df)
percentage <- round(summary(pc)$importance[2,]*100,digits=2)

mat <- sapply(
  lapply(lapply(dl,rownames),
         function(d){eoi %in% d}),as.integer)
fac <- factor(apply(mat,
  1,paste0,collapse=""))

fc <- sapply(seq(2,8,2),function(i){sign(df[,i]-df[,i-1])})

cex <- rep(24,length(fac))
cex[fac=="1000"][fc[fac=="1000",1] == -1] <- 25
cex[fac=="0100"][fc[fac=="0100",2] == -1] <- 25
cex[fac=="0010"][fc[fac=="0010",3] == -1] <- 25
cex[fac=="0001"][fc[fac=="0001",4] == -1] <- 25

plot(pc$x[,1],pc$x[,2],
     col=pal[as.integer(fac)],
     pch=cex)

col <- rep(0,length(fac))
col[fac=="1000"][fc[fac=="1000",1] == -1] <- pal[2]
col[fac=="1000"][fc[fac=="1000",1] == 1] <- pal[1]
col[fac=="0100"][fc[fac=="0100",2] == -1] <- pal[4]
col[fac=="0100"][fc[fac=="0100",2] == 1] <- pal[3]
col[fac=="0010"][fc[fac=="0010",3] == -1] <- pal[6]
col[fac=="0010"][fc[fac=="0010",3] == 1] <- pal[5]
col[fac=="0001"][fc[fac=="0001",4] == -1] <- pal[8]
col[fac=="0001"][fc[fac=="0001",4] == 1] <- pal[7]

plot(pc$x[,1],pc$x[,2],
     col=col,
     pch=cex)

legend("topright",colnames(df),col=pal,bty="n",pch=rep(c(24,25),4))

sels <- apply(mat,2,"==",1)

sapply(1:4,function(i){
  sel <- sels[,i]
  plot(pc$x[,1][sel],pc$x[,2][sel],
       col=ifelse(fc[sel,i]==-1,pal[i*2],pal[(i*2)-1]),
       pch=ifelse(fc[sel,i]==-1,24,25),
       xlim=range(pc$x[,1]),
       ylim=range(pc$x[,2]))
  legend("topright",colnames(df),col=pal,bty="n",pch=rep(c(25,24),4))
})

##      [,1]   [,2]   [,3]   [,4]  
## rect List,4 List,4 List,4 List,4
## text List,2 List,2 List,2 List,2

Fold changes

dfc <- sapply(list(dxr1,dxr1_23,dxr2,dxr3), function(d,eoi){
  d[match(eoi,rownames(d)),"log2FoldChange"]
},eoi)

colnames(dfc) <- c("pcp.vs.Col0.16",
                   "pcp.vs.Col0.23",
                  "23.vs.16.Col0",
                  "23.vs.16.pcp"
)

replace NAs

dfc[is.na(dfc)] <- 0

PCA

pc <- prcomp(t(dfc))
percentage <- round(summary(pc)$importance[2,]*100,digits=2)
plot(pc$x[,1],pc$x[,2],col=rep(pal[1:4]),
     main="PCA of the exon usage",
     xlab=paste("PC1",percentage[1],"%"),
     ylab=paste("PC2",percentage[2],"%"))
legend("center",colnames(dfc),col=pal[1:4],pch=18)

Heatmap

heatmap.2(as.matrix(df),scale="row",col=hpal,trace="none",
          labRow=FALSE,cexCol = 1)

heatmap.2(as.matrix(df[,5:8]),scale="row",col=hpal,trace="none",
          labRow=FALSE,cexCol = 1)

heatmap.2(as.matrix(df[,1:4]),scale="row",col=hpal,trace="none",
          labRow=FALSE,cexCol = 1)

Check the vst

vsd <- read.csv("/mnt/picea/projects/arabidopsis/mschmid/porcupine-RNA-Seq/analysis/HTSeq/library-size-normalized_variance-stabilized_data.csv",
                row.names = 1)

samples <- read.csv("~/Git/UPSCb/projects/arabidopsis-porcupine-RNA-Seq/doc/RNA-seq_pcp_vs_Col-0_info.csv")
samples$label <- paste(sub(" .*","",samples$Description),samples$Temperature.at.collection)


dat <- as.matrix(na.omit(vsd[unique(unlist(strsplit(sub(":E[0-9]+","",rownames(df)),"\\+"))),]))

heatmap.2(dat,scale="row",col=hpal,trace="none",
          labRow=FALSE,cexCol = 1,labCol = samples[match(sub("X","",colnames(vsd)),samples$SampleID),"label"])

#dat[dat < 1] <- 1
dat[dat > 7 ] <- 7

heatmap.2(dat,
          col=hpal,trace="none",
          labRow=FALSE,cexCol = 1,labCol = samples[match(sub("X","",colnames(vsd)),samples$SampleID),"label"])

Last check

nams <- intersect(rownames(dxr2),rownames(dxr3))
library(LSD)
heatscatter(dxr2[match(nams,rownames(dxr2)),4],dxr3[match(nams,rownames(dxr3)),4])

heatscatter(dxr2[match(nams,rownames(dxr2)),8],dxr3[match(nams,rownames(dxr3)),8],
            xlab="Col0 23 vs. 16",main="exon expression @ 16",
            ylab="pcp 23 vs. 16")
abline(1,1,col="grey",lwd=2,lty=2)

heatscatter(dxr2[match(nams,rownames(dxr2)),9],dxr3[match(nams,rownames(dxr3)),9],
            xlab="Col0 23 vs. 16",main="exon expression @ 23",
            ylab="pcp 23 vs. 16")
abline(1,1,col="grey",lwd=2,lty=2)

5 Session Info

## R version 3.4.0 (2017-04-21)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 16.04.2 LTS
## 
## Matrix products: default
## BLAS: /usr/lib/openblas-base/libblas.so.3
## LAPACK: /usr/lib/libopenblasp-r0.2.18.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
##  [1] grid      stats4    parallel  stats     graphics  grDevices utils    
##  [8] datasets  methods   base     
## 
## other attached packages:
##  [1] LSD_3.0                    limma_3.32.2              
##  [3] gplots_3.0.1               VennDiagram_1.6.17        
##  [5] futile.logger_1.4.3        DEXSeq_1.22.0             
##  [7] RColorBrewer_1.1-2         AnnotationDbi_1.38.1      
##  [9] DESeq2_1.16.1              SummarizedExperiment_1.6.3
## [11] DelayedArray_0.2.7         matrixStats_0.52.2        
## [13] GenomicRanges_1.28.3       GenomeInfoDb_1.12.2       
## [15] IRanges_2.10.2             S4Vectors_0.14.3          
## [17] Biobase_2.36.2             BiocGenerics_0.22.0       
## [19] BiocParallel_1.10.1       
## 
## loaded via a namespace (and not attached):
##  [1] bit64_0.9-7             splines_3.4.0          
##  [3] gtools_3.5.0            Formula_1.2-1          
##  [5] statmod_1.4.30          latticeExtra_0.6-28    
##  [7] blob_1.1.0              GenomeInfoDbData_0.99.0
##  [9] Rsamtools_1.28.0        yaml_2.1.14            
## [11] RSQLite_2.0             backports_1.1.0        
## [13] lattice_0.20-35         digest_0.6.12          
## [15] XVector_0.16.0          checkmate_1.8.2        
## [17] colorspace_1.3-2        htmltools_0.3.6        
## [19] Matrix_1.2-10           plyr_1.8.4             
## [21] XML_3.98-1.9            biomaRt_2.32.1         
## [23] genefilter_1.58.1       zlibbioc_1.22.0        
## [25] xtable_1.8-2            scales_0.4.1           
## [27] gdata_2.18.0            htmlTable_1.9          
## [29] tibble_1.3.3            annotate_1.54.0        
## [31] ggplot2_2.2.1           nnet_7.3-12            
## [33] lazyeval_0.2.0          survival_2.41-3        
## [35] magrittr_1.5            memoise_1.1.0          
## [37] evaluate_0.10           hwriter_1.3.2          
## [39] foreign_0.8-68          tools_3.4.0            
## [41] data.table_1.10.4       stringr_1.2.0          
## [43] munsell_0.4.3           locfit_1.5-9.1         
## [45] cluster_2.0.6           lambda.r_1.1.9         
## [47] Biostrings_2.44.1       compiler_3.4.0         
## [49] caTools_1.17.1          rlang_0.1.1            
## [51] RCurl_1.95-4.8          htmlwidgets_0.8        
## [53] bitops_1.0-6            base64enc_0.1-3        
## [55] rmarkdown_1.6           gtable_0.2.0           
## [57] DBI_0.7                 gridExtra_2.2.1        
## [59] knitr_1.16              bit_1.1-12             
## [61] Hmisc_4.0-3             rprojroot_1.2          
## [63] futile.options_1.0.0    KernSmooth_2.23-15     
## [65] stringi_1.1.5           Rcpp_0.12.11           
## [67] geneplotter_1.54.0      rpart_4.1-11           
## [69] acepack_1.4.1